from pyspark.sql import Row  
  
# 假设数据格式是：id,name,age,gender,hire_date  
schema = StructType([  
    StructField("id", IntegerType(), True),  
    StructField("name", StringType(), True),  
    StructField("age", IntegerType(), True),  
    StructField("gender", StringType(), True),  
    StructField("hire_date", StringType(), True)  # 初始化为String，后面会转换为DateType  
])  
# 使用map将每行转换为Row对象，并指定列名  
rows_rdd = rdd.map(lambda line: Row(  
    id=int(line.split(",")[0]),  
    name=line.split(",")[1],  
    age=int(line.split(",")[2]),  
    gender=line.split(",")[3],  
    hire_date=line.split(",")[4]  
))   
# 将RDD转换为DataFrame，并应用Schema  
df = spark.createDataFrame(rows_rdd, schema)   
df.printSchema()  
df.show(5)